#### setting environment ####
require(quanteda)
require(stm)
require(fanc)
require(psych)
require(RColorBrewer)
red.colors <- colorRamp(brewer.pal(9, "Reds")[c(2, 7)], space = "rgb")

newspaper.names <- c("Asahi", "Chugoku", "Chunichi", "Hokkaido", "Kahoku", 
                     "Mainichi", "Nikkei", "Nishinippon", "Sankei", "Yomiuri")

#### data preparation ####
# document-feature matrix of newspaper editorials published from October 1, 2017 to September 30, 2018
load("Study2_dfm.Rdata")
# estimation results of the 65-topic model
load("STM_result_65.Rdata")
# estimation results of the factor analysis models
load("FA_result.Rdata")

#### estimates by the unidimensional factor analysis model ####
## newspapers' ideal points
# posterior draws
Study2.ideal.points.draws <- t(rbind(FA.result$mcmc[[1]][, 1:10], 
                                     FA.result$mcmc[[2]][, 1:10], 
                                     FA.result$mcmc[[3]][, 1:10])) * 10
# point estimates (posterior means)
Study2.ideal.points.mean <- rowMeans(Study2.ideal.points.draws)

## factor loadings
# posterior draws
factor.loadings.draws <- t(rbind(FA.result$mcmc[[1]][, 11:75], 
                                 FA.result$mcmc[[2]][, 11:75], 
                                 FA.result$mcmc[[3]][, 11:75])) / 10
# point estimates (posterior means)
factor.loadings.mean <- rowMeans(factor.loadings.draws)
# descending order of the point estimates
factor.loadings.order <- order(abs(factor.loadings.mean), decreasing = TRUE)

#### each newspaper's topic distribution (Figure A.11) ####
## extract topic distributions from the stm object
topic.prop.matrix <- matrix(NA, 10, 65)
rownames(topic.prop.matrix) <- newspaper.names
for (i in 1:10) {
  topic.prop.matrix[i, ] <- colMeans(STM.result$theta[Study2.dfm@docvars$newspaper == newspaper.names[i], ])
}
topic.prop.matrix.reordered <- topic.prop.matrix[, factor.loadings.order]

## draw the figure
cairo_pdf("Figure_A11.pdf", 
          width = 6, height = 3, pointsize = 6, family = "Helvetica")
par(mar = c(0, 0, 0, 0))
plot(NULL, NULL, type = "n", xlim = c(-4, 71), ylim = c(0, 10.5), xlab = "", ylab = "", axes = FALSE)
for (i in 1:10) {
  for (j in 1:65) {
    polygon(c(j - 1 + 0.05, j - 1 + 0.05, j - 0.05, j - 0.05) + 0.5 * floor((j - 1) / 10), 
            c(i - 1 + 0.05, i - 0.05, i - 0.05, i - 1 + 0.05), border = NA, 
            col = rgb(red.colors(log(1 + topic.prop.matrix.reordered[11 - i, j]) / log(max(1 + topic.prop.matrix.reordered))) / 255))
  }
}
for (i in 1:100) {
  polygon(c(69.5, 69.5, 70.5, 70.5), (c(i - 1, i, i, i - 1) * 0.01) * 10, 
          border = rgb(red.colors(i * 0.01) / 255),
          col = rgb(red.colors(i * 0.01) / 255))
}
text(rep(70.5, 6), 
     c(0, 10 * log(1.005) / log(max(1 + topic.prop.matrix.reordered)), 
       10 * log(1.01) / log(max(1 + topic.prop.matrix.reordered)), 
       10 * log(1.02) / log(max(1 + topic.prop.matrix.reordered)), 
       10 * log(1.05) / log(max(1 + topic.prop.matrix.reordered)), 10), 
     c("0.000", "0.005", "0.010", "0.020", "0.050", round(max(topic.prop.matrix.reordered), 3)), pos = 4, cex = 0.8)
text(rep(-6, 10), 10:1 - 0.5, newspaper.names, pos = 4, cex = 0.8)
text(seq(1, 65, 10.5) - 0.5,  rep(10, length(seq(1, 65, 10))), 
     seq(1, 65, 10), pos = 3, cex = 0.8)
dev.off()

#### estimating the multidimensional factor analysis model ####
## estimating the model varing initial values
# CAUTION: it may take a long time to complete this procedure depending on the computational environment
# you may skip this procedure and load a .Rdata file containing the estimation results
gatekeeping.FA.results <- list()
for (i in 1:30) {
  set.seed(100 * i)
  gatekeeping.FA.results[[i]] <- fanc(topic.prop.matrix, factors = 9)
  print(paste0("Estimation of model ", i, " is finished at ", date(), "."))
}
# save(gatekeeping.FA.results, file = "gatekeeping_FA_results.Rdata")
# load("gatekeeping_FA_results.Rdata")

## selecting the best model by the Bayesian information criterion
select.gatekeeping.FA.results <- list()
gatekeeping.FA.results.BIC <- matrix(NA, 30, 9)
for (i in 1:30) {
  select.gatekeeping.FA.results[[i]] <- list()
  for (j in 1:length(gatekeeping.FA.results[[i]]$gamma)) {
    select.gatekeeping.FA.results[[i]][[j]] <- select(gatekeeping.FA.results[[i]], 
                                                      gamma = gatekeeping.FA.results[[i]]$gamma[j])
    gatekeeping.FA.results.BIC[i, j] <- select.gatekeeping.FA.results[[i]][[j]]$BIC
  }
}
gatekeeping.FA.results.BIC
which.min(gatekeeping.FA.results.BIC)

#### compare the estimated factor loadings scores with the results of the unidimensional model (Figure A.3) ####
## factor loadings
gatekeeping.FA.factor.loadings <- select.gatekeeping.FA.results[[30]][[9]]$loadings

## proportion of variance explained
SS.loadings <- apply(gatekeeping.FA.factor.loadings, 2, function(x) sum(x ^ 2)) / 
  sum(apply(gatekeeping.FA.factor.loadings, 2, function(x) sum(x ^ 2)))
round(SS.loadings, 3)

SS.loadings.order <- order(SS.loadings, decreasing = TRUE)

## factor scores
gatekeeping.FA.factor.scores <- factor.scores(topic.prop.matrix, 
                                              as.matrix(gatekeeping.FA.factor.loadings), 
                                              method = "Bartlett")$scores
rownames(gatekeeping.FA.factor.scores) <- newspaper.names

first.dim.score.order <- order(gatekeeping.FA.factor.scores[, SS.loadings.order[1]], decreasing = TRUE)
second.dim.score.order <- order(gatekeeping.FA.factor.scores[, SS.loadings.order[2]], decreasing = TRUE)

## draw the figure
cairo_pdf("Figure_A12.pdf",
          width = 10, height = 6, pointsize = 11, family = "Helvetica")
layout(matrix(c(1, 2), 1, 2))
par(mar = c(5, 1, 3, 1))
dotchart(gatekeeping.FA.factor.scores[first.dim.score.order, SS.loadings.order[1]], 
         pch = 19, xlim = c(-3, 2), xlab = "Estimated Scores")
mtext("First Dimension", line = 1, at = -0.5, font = 2, cex = 1.2)
dotchart(gatekeeping.FA.factor.scores[second.dim.score.order, SS.loadings.order[2]], 
         pch = 19, xlim = c(-3, 2), xlab = "Estimated Scores")
mtext("Second Dimension", line = 1, at = -0.5, font = 2, cex = 1.2)
dev.off()

## rank correlation coefficient between the first-dimension scores and ideal points estimated in Study 2
round(cor(gatekeeping.FA.factor.scores[, SS.loadings.order[1]], Study2.ideal.points.mean, method = "kendall"), 3)

#### topics whose absolute value of the factor loading is in the top ten (Table A.8) ####
# topic labels and factor loadings
topics <- c("Constitutional revision", "The 2017 general election", "North Korea", "Nuclear power plants", 
            "U.S.-Iran relationship", "Official document", "Diet deliberations", "Marine Corps Air Station Futenma", 
            "Prime minister's scandals", "National security", "Ministry of Finance", "Japan-Korea relationship", 
            "Local government", "International nuclear disarmament", "Criminal trial", "Tax and pension", 
            "China's diplomatic policy", "Trade", "Labor law", "Civil Code reform", "Broadcasting", "Syria", 
            "Integrated Resort Bill", "Employment", "Freedom of speech", "Financial policy", 
            "Israeli-Palestinian conflict", "Entrance examination reform", "National finance", 
            "Corporate governance", "Antismoking bill", "Electoral systems", "Bank", "Olympic Games", 
            "Nuclear fuel", "Nursery school", "Medicine", "Sumo", "Manufacturing scandals", 
            "Agriculture, forestry, and fisheries", "Cryptocurrency", "Child welfare", "Education", "History", 
            "State budget", "Industrial relations", "Chinese politics", "Electric power", "Culture", "MEXT", 
            "Immigration", "Harassment in sports", "Enormous rain disaster", "European politics", 
            "Employment of the disabled", "Science", "Japan-Russia relationship", "Crime", "Natural disaster", 
            "Regional revitalization", "Administrative lawsuit", "Imperial Household", "Computer game", 
            "Sports", "Traffic")
topic.labels <- topics[66 - rank(abs(factor.loadings.mean))]

## topics highly related to gatekeeping scores
# descending order of factor loadings for each dimension
first.dim.loading.order <- order(abs(gatekeeping.FA.factor.loadings[, SS.loadings.order[1]]), decreasing = TRUE)
second.dim.loading.order <- order(abs(gatekeeping.FA.factor.loadings[, SS.loadings.order[2]]), decreasing = TRUE)

# display the results
data.frame(topic = topic.labels[first.dim.loading.order[1:10]], 
           loading = round(gatekeeping.FA.factor.loadings[first.dim.loading.order[1:10], SS.loadings.order[1]], 2))

data.frame(topic = topic.labels[second.dim.loading.order[1:10]], 
           loading = round(gatekeeping.FA.factor.loadings[second.dim.loading.order[1:10], SS.loadings.order[2]], 2))